library(readxl)
library(quantmod)
## Loading required package: xts
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
## Loading required package: TTR
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
library(KernSmooth)
## KernSmooth 2.23 loaded
## Copyright M. P. Wand 1997-2009
library(MASS)
library(VineCopula)
## Warning: package 'VineCopula' was built under R version 4.2.3
library(corrplot)
## corrplot 0.92 loaded
library(ggplot2)
library(fitdistrplus)
## Loading required package: survival
library(copula)
## Warning: package 'copula' was built under R version 4.2.3
##
## Attaching package: 'copula'
## The following object is masked from 'package:VineCopula':
##
## pobs
library(kdecopula)
## Warning: package 'kdecopula' was built under R version 4.2.3
#install.packages("htmltools")
#update.packages()
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:MASS':
##
## select
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(plot3D)
## Warning: package 'plot3D' was built under R version 4.2.3
library(moments)
library(gridExtra)
library(cubature)
## Warning: package 'cubature' was built under R version 4.2.3
# Télécharger les cours de l'action Apple
getSymbols("AAPL", from = "2012-01-01", to = "2022-12-31")
## [1] "AAPL"
# Télécharger les données sur les cours de l'action Microsoft
msft <- getSymbols("MSFT", from = "2012-01-01", to = "2022-12-31", auto.assign = FALSE)
# Calculer les rendements quotidiens pour les deux variables
aapl_ret <- diff(log(Cl(AAPL)))
msft_ret <- diff(log(Cl(msft)))
data <- data.frame(Date = index(AAPL), Cl(AAPL), AAPL = aapl_ret, Cl(msft), MSFT = msft_ret)
data <- na.omit(data)
write.csv(data, file = "datasetdraft.csv", row.names = FALSE)
# Extraire les 2 variables principales
X <- data$AAPL.Close.1
Y <- data$MSFT.Close.1
n <- length(X)
plot(X, Y, xlab = "AAPL", ylab = "MSFT", main = "La concentration moyenne ",
col = "blue", cex = 0.5)
#Distribution empirique de marge
Marge_empirique <- function (X) {
n <- length(X)
Marge_empirique <- rank(X)/(n)
return (Marge_empirique)
}
F_n <- Marge_empirique(X)
G_n <- Marge_empirique(Y)
#A_n <- ecdf(X)(X)
#B_n <- ecdf(Y)(Y)
#Détecter la dépendence
#a. Diagramme de dispersion
par(mfrow = c(1, 2))
plot(X, Y, xlab = "AAPL", ylab = "MSFT", col = "skyblue2", main = "Digramme de dispersion")
plot(F_n, G_n, xlab = "AAPL", ylab = "MSFT", col = "rosybrown2", main = "Rank-rank plot")
#on voit que le rank-rank plot des 2 variables est comme celle de la coupule Student
#b. Khi-plot
# Khi-plot
BiCopChiPlot(F_n, G_n, mode = "NULL", ylim = c(-0.5, 0.5), main = "Khi-plot", col = "lightgreen", cex = 0.5)
# K-plot
BiCopKPlot(F_n, G_n, PLOT = TRUE, main = "K-plot", col = "slateblue")
#I. Tests d'indépendance
#Parce que les deux variables ne sont pas gaussiennes. On va faire les tests non paramétriques
#Test de Spearman
test.spearman <- cor.test(X, Y, method = "spearman", exact = FALSE)
test.spearman
##
## Spearman's rank correlation rho
##
## data: X and Y
## S = 1.584e+09, p-value < 2.2e-16
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## 0.5513852
#une corrélation positive forte et statistiquement significative entre ces deux variables
#Test de Kendall
test.kendall <- cor.test(X, Y, method = "kendall")
test.kendall
##
## Kendall's rank correlation tau
##
## data: X and Y
## z = 31.559, p-value < 2.2e-16
## alternative hypothesis: true tau is not equal to 0
## sample estimates:
## tau
## 0.4002385
# une forte corrélation positive entre les deux variables analysées.
#Test de Van der Waerden
#La valuer de la statistique
Van <- 1/length(X) * sum(qnorm(F_n, 0, 1) * qnorm(G_n, 0, 1))
Var_nVan <- 1/(length(X) - 1) * (sum(qnorm(c(1: n) / (n + 1), 0, 1) ^ 2) ^ 2)
t_statisque <- Van / sqrt(Var_nVan) * n
p_value <- (1 - pnorm(t_statisque, 0, 1)) * 2
p_value#p_value = 0 très petite, on rejete l'hypothèse H0
## [1] 0
#Tous les 3 tests montrent que les 2 variables sont dépendences
empirique_Copula <- function(X,Y,u,v,step){
if(length(X) != length(Y)) stop("X and Y must have equal length")
n <- length(X)
p <- 0
rX <- rank(X) / (n)
rY <- rank(Y) / (n)
for (i in 1:n){
p <- p + ((u - step/2 <= rX[i] & rX[i] <= u + step/2) & (v - step/2 <= rY[i] & rY[i] <= v + step/ 2))
}
return(p / (n * step * step))
}
databrut <- cbind(X,Y)
#Convertir les valeurs des données en rangs, puis les ajuster pour qu'ils soient compris dans l'intervalle [0, 1]
data_rank <- apply(databrut, 2, rank) / (n + 1)
kde <- kde2d(data_rank[,1], data_rank[,2], n = 20)
U1 <- seq(0,1,length.out =20)
U2 <- seq(0,1,length.out =20)
grid <- expand.grid(U1 = U1, U2 = U2)
plot_ly(x = kde$x, y = kde$y, z = kde$z, type = "surface")
z_brut <- matrix(empirique_Copula(data_rank[,1], data_rank[,2], grid$U1, grid$U2, 1/20), nrow = 20, ncol = 20)
z_brut
## [,1] [,2] [,3] [,4] [,5] [,6] [,7]
## [1,] 4.6259487 1.8792917 0.7228045 0.5782436 0.2891218 0.0000000 0.1445609
## [2,] 2.7466570 6.2161185 1.7347308 1.4456090 1.1564872 0.7228045 0.4336827
## [3,] 0.5782436 2.8912179 3.3249006 1.8792917 1.4456090 1.8792917 0.8673654
## [4,] 0.1445609 2.1684134 2.8912179 3.0357788 1.8792917 1.7347308 0.8673654
## [5,] 0.0000000 1.4456090 2.0238525 1.4456090 2.0238525 1.8792917 0.8673654
## [6,] 0.1445609 1.0119263 0.5782436 2.3129743 0.8673654 1.7347308 1.5901699
## [7,] 0.0000000 0.2891218 0.5782436 1.3010481 1.7347308 1.7347308 1.5901699
## [8,] 0.5782436 0.0000000 1.0119263 1.3010481 1.4456090 1.4456090 1.1564872
## [9,] 0.0000000 0.1445609 0.8673654 0.8673654 1.0119263 1.1564872 2.4575352
## [10,] 0.1445609 0.2891218 1.0119263 0.5782436 1.1564872 1.0119263 1.3010481
## [11,] 0.1445609 0.2891218 1.0119263 0.5782436 1.7347308 1.4456090 1.0119263
## [12,] 0.1445609 0.1445609 0.7228045 0.0000000 0.5782436 0.5782436 1.3010481
## [13,] 0.0000000 0.5782436 0.2891218 0.5782436 0.8673654 0.7228045 0.8673654
## [14,] 0.1445609 0.1445609 0.5782436 0.5782436 0.2891218 0.7228045 1.4456090
## [15,] 0.0000000 0.1445609 0.5782436 0.8673654 0.5782436 0.4336827 0.7228045
## [16,] 0.0000000 0.0000000 0.1445609 0.7228045 0.5782436 0.2891218 0.1445609
## [17,] 0.0000000 0.1445609 0.2891218 0.4336827 0.2891218 0.5782436 0.5782436
## [18,] 0.0000000 0.5782436 0.1445609 0.1445609 0.7228045 0.1445609 0.1445609
## [19,] 0.0000000 0.1445609 0.2891218 0.2891218 0.1445609 0.2891218 0.8673654
## [20,] 0.1445609 0.4336827 0.2891218 0.1445609 0.0000000 0.2891218 0.2891218
## [,8] [,9] [,10] [,11] [,12] [,13] [,14]
## [1,] 0.1445609 0.0000000 0.0000000 0.1445609 0.1445609 0.0000000 0.1445609
## [2,] 0.7228045 0.7228045 0.2891218 0.4336827 0.2891218 0.4336827 0.2891218
## [3,] 0.5782436 1.4456090 0.2891218 0.5782436 0.7228045 0.4336827 0.4336827
## [4,] 0.8673654 1.3010481 1.0119263 0.5782436 0.2891218 0.7228045 0.4336827
## [5,] 0.7228045 1.5901699 1.1564872 1.1564872 0.5782436 0.2891218 1.1564872
## [6,] 1.4456090 1.1564872 1.1564872 0.8673654 0.8673654 0.7228045 0.8673654
## [7,] 2.1684134 1.4456090 1.3010481 1.3010481 1.4456090 1.4456090 0.5782436
## [8,] 2.3129743 1.0119263 1.3010481 1.1564872 0.7228045 1.0119263 0.8673654
## [9,] 1.4456090 1.1564872 1.1564872 1.8792917 1.8792917 1.7347308 0.7228045
## [10,] 1.7347308 1.4456090 2.3129743 1.3010481 0.5782436 1.1564872 1.1564872
## [11,] 1.1564872 1.1564872 1.3010481 1.0119263 0.7228045 0.7228045 1.7347308
## [12,] 1.0119263 1.4456090 1.8792917 1.5901699 1.0119263 1.7347308 0.5782436
## [13,] 0.8673654 1.1564872 1.3010481 1.3010481 1.8792917 1.8792917 1.8792917
## [14,] 0.8673654 1.1564872 0.7228045 1.4456090 1.8792917 0.8673654 1.1564872
## [15,] 0.8673654 1.1564872 0.7228045 1.3010481 1.1564872 1.4456090 2.0238525
## [16,] 0.7228045 0.7228045 1.4456090 1.1564872 1.3010481 0.5782436 2.3129743
## [17,] 0.7228045 0.5782436 1.1564872 0.5782436 1.3010481 0.8673654 1.1564872
## [18,] 0.0000000 0.0000000 0.2891218 0.4336827 1.0119263 2.1684134 0.8673654
## [19,] 0.4336827 0.2891218 0.5782436 0.8673654 0.8673654 0.5782436 0.8673654
## [20,] 0.0000000 0.1445609 0.1445609 0.0000000 0.4336827 0.5782436 0.1445609
## [,15] [,16] [,17] [,18] [,19] [,20]
## [1,] 0.1445609 0.2891218 0.0000000 0.0000000 0.0000000 0.0000000
## [2,] 0.4336827 0.1445609 0.5782436 0.1445609 0.1445609 0.1445609
## [3,] 0.1445609 0.4336827 0.4336827 0.2891218 0.1445609 0.1445609
## [4,] 0.2891218 0.2891218 0.2891218 0.2891218 0.1445609 0.0000000
## [5,] 0.7228045 0.4336827 0.1445609 0.2891218 0.2891218 0.4336827
## [6,] 1.0119263 0.4336827 1.3010481 0.7228045 0.0000000 0.0000000
## [7,] 0.2891218 0.5782436 0.5782436 0.1445609 0.2891218 0.1445609
## [8,] 1.1564872 0.8673654 0.7228045 0.1445609 0.2891218 0.7228045
## [9,] 0.4336827 0.5782436 0.7228045 0.4336827 0.4336827 0.2891218
## [10,] 1.1564872 1.0119263 0.4336827 0.5782436 0.5782436 0.0000000
## [11,] 1.1564872 1.0119263 0.2891218 1.1564872 0.7228045 0.4336827
## [12,] 1.4456090 1.3010481 1.1564872 0.7228045 1.1564872 0.0000000
## [13,] 1.7347308 1.0119263 0.4336827 1.1564872 0.7228045 0.1445609
## [14,] 1.5901699 1.5901699 0.8673654 1.3010481 1.3010481 0.1445609
## [15,] 1.4456090 1.1564872 1.7347308 1.4456090 1.3010481 0.2891218
## [16,] 1.8792917 2.0238525 2.4575352 1.7347308 0.5782436 0.0000000
## [17,] 1.1564872 1.4456090 2.4575352 2.1684134 2.4575352 0.2891218
## [18,] 0.8673654 2.1684134 3.3249006 2.8912179 2.8912179 0.4336827
## [19,] 1.3010481 1.3010481 1.1564872 2.4575352 3.7585833 2.7466570
## [20,] 0.7228045 0.5782436 0.1445609 0.4336827 1.5901699 3.4694615
plot_ly(x = U1, y = U2, z = z_brut, type = "surface")
#II.Estimation de copule
#1.Methode parametrique
#Histogramme
plotdist(X, breaks = 20, histo = TRUE, demp = TRUE)
descdist(X, discrete=FALSE, boot=500)
## summary statistics
## ------
## min: -0.137708 max: 0.1131575
## median: 0.0006757409
## mean: 0.0007878741
## estimated sd: 0.01836405
## estimated skewness: -0.276231
## estimated kurtosis: 8.771136
plotdist(Y, histo = TRUE, demp = TRUE)
descdist(Y, discrete=FALSE, boot=500)
## summary statistics
## ------
## min: -0.1594534 max: 0.132929
## median: 0.0006322637
## mean: 0.000792413
## estimated sd: 0.01673784
## estimated skewness: -0.232189
## estimated kurtosis: 12.04573
#=>X and Y ne semblent suivre aucune distribution populaire
#on ne peut pas utiliser les valeurs de X parce que:
fit1 <- fitdist(Y, "norm", lower = c(-Inf, 0), start = list(mean = mean(X), sd = sd(X)))
# Test goodness of fit
ks.test(Y, "pnorm", mean = fit1$estimate["mean"], sd = fit1$estimate["sd"])
## Warning in ks.test.default(Y, "pnorm", mean = fit1$estimate["mean"], sd =
## fit1$estimate["sd"]): ties should not be present for the Kolmogorov-Smirnov test
##
## Asymptotic one-sample Kolmogorov-Smirnov test
##
## data: Y
## D = 0.078589, p-value = 2.887e-15
## alternative hypothesis: two-sided
#ne suivre pas la distribution Normal
#2.Semi-Parametrique
#Choisir la copule appropriée pour l’ensemble de données
selectedCopula <- BiCopSelect(data_rank[,1], data_rank[,2], familyset = NA)
selectedCopula
## Bivariate copula: t (par = 0.59, par2 = 2.92, tau = 0.4)
#Student with parameter 0.59
model_Copula <- tCopula(param = 0.59, dim = 2, dispstr="un", df=6)
fitstudent <- fitCopula(model_Copula, data_rank, method = "ml")
fitstudent
## Call: fitCopula(model_Copula, data = data_rank, ... = pairlist(method = "ml"))
## Fit based on "maximum likelihood" and 2767 2-dimensional observations.
## Copula: tCopula
## rho.1 df
## 0.5918 2.9221
## The maximized loglikelihood is 700.3
## Optimization converged
rho = coef(fitstudent)[1] #the estimated correlation parameter
#rho = 0.5917759
df = coef(fitstudent)[2] #the degrees of freedom
#df = 2.922124
#plot_3d <- persp(tCopula(dim=2,rho,df=df),dCopula, col ="salmon")
#3D visualisation sous la copule Student
# Convertir les valeurs des données en rangs, puis les ajuster pour qu'ils soient compris dans l'intervalle [0, 1]
U1 <- seq(0,1,length.out =20)
U2 <- seq(0,1,length.out =20)
grid <- expand.grid(U1 = U1, U2 = U2)
z_student <- c()
for (i in (1:nrow(grid))){
z_student <- append(z_student, dCopula(c(grid[i,1], grid[i,2]), model_Copula))
}
z_student <- matrix(z_student, ncol = length(U1), nrow = length(U1))
plot_ly(x = U1, y = U2, z = z_student, type = "surface")
#Test goodness-of-fit
gof.t <- gofCopula(copula = ellipCopula(family = c("t"), df =6, df.fixed = T),x = data_rank)
## Warning in .gofPB(copula, x, N = N, method = method, estim.method =
## estim.method, : argument 'ties' set to TRUE
## Warning in fitCopula.ml(copula, u = data, method = method, start = start, :
## possible convergence problem: optim() gave code=52
## Warning in fitCopula.ml(copula, u = data, method = method, start = start, :
## possible convergence problem: optim() gave code=52
## Warning in fitCopula.ml(copula, u = data, method = method, start = start, :
## possible convergence problem: optim() gave code=52
## Warning in fitCopula.ml(copula, u = data, method = method, start = start, :
## possible convergence problem: optim() gave code=52
## Warning in fitCopula.ml(copula, u = data, method = method, start = start, :
## possible convergence problem: optim() gave code=52
## Warning in fitCopula.ml(copula, u = data, method = method, start = start, :
## possible convergence problem: optim() gave code=52
## Warning in fitCopula.ml(copula, u = data, method = method, start = start, :
## possible convergence problem: optim() gave code=52
## Warning in fitCopula.ml(copula, u = data, method = method, start = start, :
## possible convergence problem: optim() gave code=52
## Warning in fitCopula.ml(copula, u = data, method = method, start = start, :
## possible convergence problem: optim() gave code=52
## Warning in fitCopula.ml(copula, u = data, method = method, start = start, :
## possible convergence problem: optim() gave code=52
## Warning in fitCopula.ml(copula, u = data, method = method, start = start, :
## possible convergence problem: optim() gave code=52
## Warning in fitCopula.ml(copula, u = data, method = method, start = start, :
## possible convergence problem: optim() gave code=52
## Warning in fitCopula.ml(copula, u = data, method = method, start = start, :
## possible convergence problem: optim() gave code=52
## Warning in fitCopula.ml(copula, u = data, method = method, start = start, :
## possible convergence problem: optim() gave code=52
## Warning in fitCopula.ml(copula, u = data, method = method, start = start, :
## possible convergence problem: optim() gave code=52
## Warning in fitCopula.ml(copula, u = data, method = method, start = start, :
## possible convergence problem: optim() gave code=52
## Warning in fitCopula.ml(copula, u = data, method = method, start = start, :
## possible convergence problem: optim() gave code=52
## Warning in fitCopula.ml(copula, u = data, method = method, start = start, :
## possible convergence problem: optim() gave code=52
## Warning in fitCopula.ml(copula, u = data, method = method, start = start, :
## possible convergence problem: optim() gave code=52
## Warning in fitCopula.ml(copula, u = data, method = method, start = start, :
## possible convergence problem: optim() gave code=52
## Warning in fitCopula.ml(copula, u = data, method = method, start = start, :
## possible convergence problem: optim() gave code=52
## Warning in fitCopula.ml(copula, u = data, method = method, start = start, :
## possible convergence problem: optim() gave code=52
## Warning in fitCopula.ml(copula, u = data, method = method, start = start, :
## possible convergence problem: optim() gave code=52
## Warning in fitCopula.ml(copula, u = data, method = method, start = start, :
## possible convergence problem: optim() gave code=52
## Warning in fitCopula.ml(copula, u = data, method = method, start = start, :
## possible convergence problem: optim() gave code=52
## Warning in fitCopula.ml(copula, u = data, method = method, start = start, :
## possible convergence problem: optim() gave code=52
## Warning in fitCopula.ml(copula, u = data, method = method, start = start, :
## possible convergence problem: optim() gave code=52
## Warning in fitCopula.ml(copula, u = data, method = method, start = start, :
## possible convergence problem: optim() gave code=52
## Warning in fitCopula.ml(copula, u = data, method = method, start = start, :
## possible convergence problem: optim() gave code=52
## Warning in fitCopula.ml(copula, u = data, method = method, start = start, :
## possible convergence problem: optim() gave code=52
## Warning in fitCopula.ml(copula, u = data, method = method, start = start, :
## possible convergence problem: optim() gave code=52
## Warning in fitCopula.ml(copula, u = data, method = method, start = start, :
## possible convergence problem: optim() gave code=52
## Warning in fitCopula.ml(copula, u = data, method = method, start = start, :
## possible convergence problem: optim() gave code=52
## Warning in fitCopula.ml(copula, u = data, method = method, start = start, :
## possible convergence problem: optim() gave code=52
## Warning in fitCopula.ml(copula, u = data, method = method, start = start, :
## possible convergence problem: optim() gave code=52
## Warning in fitCopula.ml(copula, u = data, method = method, start = start, :
## possible convergence problem: optim() gave code=52
## Warning in fitCopula.ml(copula, u = data, method = method, start = start, :
## possible convergence problem: optim() gave code=52
## Warning in fitCopula.ml(copula, u = data, method = method, start = start, :
## possible convergence problem: optim() gave code=52
## Warning in fitCopula.ml(copula, u = data, method = method, start = start, :
## possible convergence problem: optim() gave code=52
## Warning in fitCopula.ml(copula, u = data, method = method, start = start, :
## possible convergence problem: optim() gave code=52
## Warning in fitCopula.ml(copula, u = data, method = method, start = start, :
## possible convergence problem: optim() gave code=52
## Warning in fitCopula.ml(copula, u = data, method = method, start = start, :
## possible convergence problem: optim() gave code=52
## Warning in fitCopula.ml(copula, u = data, method = method, start = start, :
## possible convergence problem: optim() gave code=52
## Warning in fitCopula.ml(copula, u = data, method = method, start = start, :
## possible convergence problem: optim() gave code=52
## Warning in fitCopula.ml(copula, u = data, method = method, start = start, :
## possible convergence problem: optim() gave code=52
## Warning in fitCopula.ml(copula, u = data, method = method, start = start, :
## possible convergence problem: optim() gave code=52
## Warning in fitCopula.ml(copula, u = data, method = method, start = start, :
## possible convergence problem: optim() gave code=52
gof.t
##
## Parametric bootstrap-based goodness-of-fit test of t-copula, dim. d =
## 2, with 'method'="Sn", 'estim.method'="mpl":
##
## data: x
## statistic = 0.077476, parameter = 0.61355, p-value = 0.0004995
Parce que l’on peut ces deux tail dependencie ou au moins lower tail dependency, nous allons essayer la copule Clayton pour modéliser la dépendance entre ces 2 variables.
#Comme les graphs montrent qu'il y a des tail dependency entre les deux variables, on va choisir les familes des copules
# qui peuvent representer ces deux tail dependencie ou au moins lower tail dependency
#1.1 Clayton Copula:
Clayton.fit <- fitCopula(claytonCopula(), data = data_rank, method = 'mpl')
Clayton.fit
## Call: fitCopula(claytonCopula(), data = data_rank, ... = pairlist(method = "mpl"))
## Fit based on "maximum pseudo-likelihood" and 2767 2-dimensional observations.
## Copula: claytonCopula
## alpha
## 1.335
## The maximized loglikelihood is 540.8
## Optimization converged
Clayton.obs <- claytonCopula(param = 1.335)
Clayton.obs
## Clayton copula, dim. d = 2
## Dimension: 2
## Parameters:
## alpha = 1.335
#result
#alpha
#1.335
#The maximized loglikelihood is 540.8
#Optimization converged
#3D show
z_clayton <- c()
for (i in (1:nrow(grid))){
z_clayton <- append(z_clayton, dCopula(c(grid[i,1], grid[i,2]), Clayton.obs))
}
z_clayton <- matrix(z_clayton, ncol = length(U1), nrow = length(U1))
plot_ly(x = U1, y = U2, z = z_clayton, type = "surface")
#Test goodness-of-fit
gof.Clayton <- gofCopula( Clayton.obs, data_rank)
## Warning in .gofPB(copula, x, N = N, method = method, estim.method =
## estim.method, : argument 'ties' set to TRUE
## Warning in fitCopula.ml(copula, u = data, method = method, start = start, :
## possible convergence problem: optim() gave code=52
## Warning in fitCopula.ml(copula, u = data, method = method, start = start, :
## possible convergence problem: optim() gave code=52
## Warning in fitCopula.ml(copula, u = data, method = method, start = start, :
## possible convergence problem: optim() gave code=52
gof.Clayton
##
## Parametric bootstrap-based goodness-of-fit test of Clayton copula, dim.
## d = 2, with 'method'="Sn", 'estim.method'="mpl":
##
## data: x
## statistic = 0.41974, parameter = 1.3347, p-value = 0.0004995
#3.Non parametric:
#3.1 Kernel Estimation
#3.1.1 Gaussian Kernel without reflection method
# Define Gaussian kernel function
gaussian_kernel <- function(u, v, h) {
(1 / (2 * pi * h^2)) * exp(-(u^2 + v^2) / (2 * h^2))
}
kernel_density_estimation <- function(u, v, data, h) {
n <- nrow(data)
density <- 0
for (i in 1:n) {
density <- density + gaussian_kernel((u - data[i, 1]), (v - data[i, 2]), h)
}
density <- density / (n)
return(density)
}
loglik_func_kernel <- function(data,h){
n <- nrow(data)
loglik = sum(log(kernel_density_estimation(data[,1], data[,2],data,h)))
return(loglik)
}
test_h <- seq(0,0.3, length.out = 10 )
log_lik_ker <- c()
for (i in test_h){
log_lik_ker <- append(log_lik_ker, loglik_func_kernel(data_rank, i))
}
plot(test_h, log_lik_ker)
par(mfrow = c(2, 2))
plot_ly(x = U1, y = U2, z = matrix(kernel_density_estimation(grid$U1, grid$U2, data_rank, 0.01), nrow = 20, ncol = 20), type = "surface")
plot_ly(x = U1, y = U2, z = matrix(kernel_density_estimation(grid$U1, grid$U2, data_rank, 0.05), nrow = 20, ncol = 20), type = "surface")
plot_ly(x = U1, y = U2, z = matrix(kernel_density_estimation(grid$U1, grid$U2, data_rank, 0.07), nrow = 20, ncol = 20), type = "surface")
plot_ly(x = U1, y = U2, z = matrix(kernel_density_estimation(grid$U1, grid$U2, data_rank, 0.1), nrow = 20, ncol = 20), type = "surface")
#3.1.2.Gaussian Kernel reflection method
reflection<- function(data){
augmented_data <- data.frame()
for (i in c( 1, 0, -1 )){
if (i == 1){
col1 <- 2- data[,1]
}
else if (i == 0){
col1 <- data[,1]
}
else {
col1 <- -data[,1]
}
for (j in c(1, 0, -1)){
if (j == 1){
col2 <- 2- data[,2]
}
else if (j == 0){
col2 <- data[,2]
}
else {
col2 <- -data[,2]
}
newdat <- cbind(col1, col2)
augmented_data <- rbind(augmented_data, newdat)
}
}
return(augmented_data)
}
augmented_data <- reflection(data_rank)
par(mfrow = c(1, 2))
plot(augmented_data, main = "Data reflected", xlab = 'U', ylab = 'V')
plot(data_rank, main = "Data" , xlab = 'U', ylab = 'V')
kernel_density_reflection_estimation <- function(u, v,data, h) {
n <- nrow(augmented_data)
density <- 0
for (i in 1:n) {
density <- density + gaussian_kernel((u - augmented_data[i, 1]), (v - augmented_data[i, 2]), h)
}
density <- density * 9 / (n)
return(density)
}
test_h <- seq(0,0.3, length.out = 10 )
log_lik_ker <- c()
for (i in test_h){
log_lik_ker <- append(log_lik_ker, sum(log(kernel_density_reflection_estimation(data_rank[,1], data_rank[,2],augmented_data,i))))
}
plot(test_h, log_lik_ker)
plot_ly(x = U1, y = U2, z = matrix(kernel_density_reflection_estimation(grid$U1, grid$U2, combined_data, 0.01), nrow = 20, ncol =20)
, type = "surface")
plot_ly(x = U1, y = U2, z = matrix(kernel_density_reflection_estimation(grid$U1, grid$U2, combined_data, 0.05), nrow = 20, ncol =20)
, type = "surface")
plot_ly(x = U1, y = U2, z = matrix(kernel_density_reflection_estimation(grid$U1, grid$U2, combined_data, 0.07), nrow = 20, ncol =20)
, type = "surface")
plot_ly(x = U1, y = U2, z = matrix(kernel_density_reflection_estimation(grid$U1, grid$U2, combined_data, 0.1), nrow = 20, ncol =20)
, type = "surface")
loglik_reflected <- sum(log(kernel_density_reflection_estimation(data_rank[,1], data_rank[,2],augmented_data,0.1)))
loglik_reflected
## [1] 565.5902
#3.2 Beta Kernel Estimation
# Bivariate beta kernel function
bivariate_beta_kernel <- function(x,alpha, beta) {
return((x^(alpha - 1) * (1 - x)^(beta - 1) / beta(alpha, beta)))}
# Bivariate beta kernel density estimation function
bivariate_beta_kernel_density <- function(data, u,v, bandwidth) {
n <- nrow(data)
alpha_u <- u/bandwidth + 1
beta_u <- (1-u)/bandwidth + 1
alpha_v <- v/bandwidth + 1
beta_v <- (1-v)/bandwidth + 1
kde <- 0
for (i in 1:n){
kde <- kde + bivariate_beta_kernel(data[i,1],alpha_u, beta_u) * bivariate_beta_kernel(data[i,2],alpha_v, beta_v)
}
return(kde/n)
}
test_h <- seq(0,0.3, length.out = 10 )
log_lik_ker <- c()
for (i in test_h){
log_lik_ker <- append(log_lik_ker, sum(log(bivariate_beta_kernel_density(data_rank, data_rank[,1], data_rank[,2], i))))
}
plot(test_h, log_lik_ker)
plot_ly(x = U1, y = U2 ,z = matrix(bivariate_beta_kernel_density(data_rank, grid$U1, grid$U2, 0.01), nrow = length(U1), ncol = length(U1)), type = "surface")
plot_ly(x = U1, y = U2 ,z = matrix(bivariate_beta_kernel_density(data_rank, grid$U1, grid$U2, 0.03), nrow = length(U1), ncol = length(U1)), type = "surface")
plot_ly(x = U1, y = U2 ,z = matrix(bivariate_beta_kernel_density(data_rank, grid$U1, grid$U2, 0.05), nrow = length(U1), ncol = length(U1)), type = "surface")
plot_ly(x = U1, y = U2 ,z = matrix(bivariate_beta_kernel_density(data_rank, grid$U1, grid$U2, 0.1), nrow = length(U1), ncol = length(U1)), type = "surface")
#plot_ly(x = U1, y = U2 ,z = z_beta, type = "contour")
#3. Model Comparison
h_gaus <- 0.07
h_reflec <- 0.05
h_beta <- 0.02
squared_error <- function(x, hypothetical_copula) {
u <- x[1]
v <- x[2]
empirical_density <- empirique_Copula(F_n, G_n, u, v, 0.05)
if (hypothetical_copula == 'gker'){
hypothetical_density <- kernel_density_estimation(u, v,data_rank, h_gaus)
}
else if (hypothetical_copula == 'gkerf'){
hypothetical_density <- kernel_density_reflection_estimation(u, v,data_rank, h_reflec)
}
else if (hypothetical_copula == 'beta'){
hypothetical_density <- bivariate_beta_kernel_density(data_rank,u, v, h_beta)
}
else if (hypothetical_copula == 'student'){
hypothetical_density <- dCopula(c(u,v), copula = fitstudent@copula)
}
else {
hypothetical_density <- dCopula(c(u,v), copula = Clayton.obs)
}
return((empirical_density - hypothetical_density)^2)
}
mise_result <- adaptIntegrate(squared_error,lower = c(0, 0),
upper = c(1, 1),
hypothetical_copula = 'gker', maxEval = 100)
MISE_compare <- data.frame()
for (i in c('gker', 'gkerf','beta', 'student', 'clayton')){
mise <- adaptIntegrate(squared_error,lower = c(0, 0),upper = c(1, 1),hypothetical_copula = i , maxEval=100)
MISE_compare <- rbind(MISE_compare, c(i, mise$integral))
}
plot_ly(x = MISE_compare[,1], y = MISE_compare[,2], type = 'bar')